{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "s1=\"The Lazarus Project (2008)\"\n",
    "s2=\"The Cry (Il Grido) (1957)\"\n",
    "s3=\"A Hell of a Day (Reines d'un jour) (2001)\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "def format(original_title_with_leading_article):\n",
    "    articles = [\"The\",\"A\",\"An\"]\n",
    "    \n",
    "    # e.g.: \"The Lazarus Project (2008)\"\n",
    "    pat_no_subtitle = r'^([^\\(]+) (\\(\\d{4}\\))$'\n",
    "    \n",
    "    # e.g. \"A Hell of a Day (Reines d'un jour) (2001)\"\n",
    "    pat_with_subtitle = r'^([^\\(]+) (\\([^\\(]+\\)) (\\(\\d{4}\\))$'\n",
    "    \n",
    "    pat_first_word_w_space = r'^(\\w+ )(.+)$'\n",
    "    \n",
    "    for article in articles:\n",
    "        if original_title_with_leading_article.startswith(article):\n",
    "            \n",
    "            # option 1 - no subtitles\n",
    "            match = re.match(pat_no_subtitle,original_title_with_leading_article)\n",
    "                \n",
    "            if match:\n",
    "                title = match.group(1)\n",
    "                year = match.group(2)\n",
    "                                \n",
    "                first_word = re.match(pat_first_word_w_space,title).group(1)\n",
    "                \n",
    "                title_no_article = re.sub(pat_first_word_w_space,r'\\2',title)\n",
    "                \n",
    "                title_article_in_front = title_no_article+\", \"+first_word\n",
    "                \n",
    "                formatted_title = title_article_in_front+year\n",
    "                \n",
    "                return '\"'+formatted_title+'\"'\n",
    "                \n",
    "            # option 2 - with subtitles\n",
    "            match = re.match(pat_with_subtitle,original_title_with_leading_article)\n",
    "            \n",
    "            if match:\n",
    "                main_title = match.group(1)\n",
    "                subtitle = match.group(2)\n",
    "                year = match.group(3)\n",
    "                \n",
    "                first_word = re.match(pat_first_word_w_space,main_title).group(1)\n",
    "                \n",
    "                main_title_no_article = re.sub(pat_first_word_w_space,r'\\2',main_title)\n",
    "                \n",
    "                main_title_article_in_front = main_title_no_article+\", \"+first_word\n",
    "                \n",
    "                formatted_title = main_title_article_in_front+subtitle+\" \"+ year\n",
    "                \n",
    "                return '\"'+formatted_title+'\"'                \n",
    "                \n",
    "                \n",
    "    # no match, return original\n",
    "    return original_title_with_leading_article"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\"Cry, The (Il Grido) (1957)\"'"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "format(s2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Global TF Kernel (Python 3)",
   "language": "python",
   "name": "global-tf-python-3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
